/*--------------------------------------------------------------------------------
*      							Guinea Final Analysis
Author: 			Jacobus Cilliers
Last date modified:	January 16 2022
Description: 		Replicates main tables for the paper "Heterogenous Teacher Effects of Two Incentive Schemes: Evidence from a Low-Income Country"
 
*--------------------------------------------------------------------------------*/
*--------------------------------------------------------------------------------
* Set the critical parameters of the computing environment.
*--------------------------------------------------------------------------------

* Clear all computer memory and delete any existing stored graphs and matrices:
clear all
clear matrix
set more off

*Define globals (Note: you need to set our own global for the folder path)
if c(username) == "jacob" { 
	global root "C:\Users\jacob\Dropbox\guinea_teacher_incentives\replication" 
	}

	global final "$root/data/final"
	global tables "$root/output/tables"
	global figs "$root/output/figures"	
	
	
**************************************	
*** Uload and Prepare the data ***
**************************************



use "$final\student_all_years.dta", clear

	**The following 15 lines of code expands the data long so two observations per individua - one for language and one for math.
	*Rename learning outcomes so can perform a reshape
	ren langirtscore irt1
	ren mathirtscore irt2
	forvalues i = 2/ 3 { 
		ren base_mean_langirtscore_g`i' base_mean_g`i'_irt1
		ren base_mean_mathirtscore_g`i' base_mean_g`i'_irt2
	}
	ren base_mean_mathirtscore_g3_mis base_g3_mis
	drop base_mean_langirtscore_g3_mis //This and the above variable are exactly the same. 
	ren base_mean_mathirtscore_g2_mis base_g2_mis 
	drop base_mean_langirtscore_g2_mis //This and the above variable are exactly the same. 	
	*Create temporary ID so can reshape
	gen tempid = _n
	*Reshape the data. 
	reshape long irt base_mean_g2_irt base_mean_g3_irt, i(tempid) j(subject)
	

**Impute the missing values. So that we do not lose observations when adding them as controls in the regression. 		
		foreach var of varlist student_age female HT_age HT_female {
			gen `var'_mis = `var' == .
			qui sum `var' if treat== 0
			replace `var' = `r(mean)' if `var' == .
		}
	
**************************************	
*** Table 1: Main treatment effect ***
**************************************

	qui reg irt T1 T2 i.strat  i.grade i.subject female female_mis  student_age student_age_mis base_mean_* base_g3_mis base_g2_mis HT_age* HT_female*  ///
			if period  == 1  & grade == 2 [pweight = 1/students_per_school], cluster(code_ecole)		
			qui test T1 = T2
			local p1 `r(p)' 	
			qui sum irt if treat == 0 & period == 0 & gr2round1 //restricted to control schools where they also have data for grade 2 student data at midline
			local m = `r(mean)'
			eststo round1_gr2, addscalars(p1 `p1' m `m')	

	forvalues p = 1/2 { //this loops midline and endline
		qui reg irt T1 T2 i.strat  i.grade i.subject female female_mis  student_age student_age_mis base_mean_* base_g3_mis base_g2_mis HT_age* HT_female*  ///
			if period  == `p'  & grade > 2 [pweight = 1/students_per_school], cluster(code_ecole)		
			qui test T1 = T2
			local p1 `r(p)' 	
			qui sum irt if treat == 0 & period == 0 & gr3round`p' & gr4round`p' //restricted to control schools in the control, where they also have data for both grade 3 and grade 4 and round p
			local m = `r(mean)'
			eststo round`p'_full, addscalars(p1 `p1' m `m')		
		forvalues g = 3/4 {
			qui reg irt T1 T2 i.strat i.subject female female_mis  student_age student_age_mis base_mean_* base_g3_mis base_g2_mis HT_age* HT_female*  ///
				if period  == `p'  & grade == `g' [pweight = 1/students_per_grade], cluster(code_ecole)		
				qui test T1 = T2
				local p1 `r(p)' 	
				qui sum irt if treat == 0 & period == 0 & gr`g'round`p'
				local m = `r(mean)'
				eststo round`p'_grade`g', addscalars(p1 `p1' m `m')
			}
		forvalues s = 1/2 {
			qui reg irt T1 T2 i.strat  i.grade female female_mis  student_age student_age_mis base_mean_* base_g3_mis base_g2_mis HT_age* HT_female*  ///
				if period  == `p'  & subject == `s' & grade > 2 [pweight = 1/students_per_school], cluster(code_ecole)		
				qui test T1 = T2
				local p1 `r(p)' 	
				qui sum irt if treat == 0 & period == 0 & subject == `s' & grade > 2
				local m = `r(mean)'
				eststo round`p'_subject`s', addscalars(p1 `p1' m `m')
			}
	}
	

	esttab 	round1_full round2_full round1_gr2 round1_grade3 round2_grade3 round1_grade4  round2_grade4 round1_subject1 round2_subject1 round1_subject2 round2_subject2 ///
				using "$tables/Table1.tex", replace ///
				tex label se(3) b(3) alignment(cl) r2 substitute(\_ _ $ \$ ) ///
				stats(m N r2 p1,  fmt(3 0 3) labels("Control mean" "Observations" "R-squared" "Test:In-Kind=Recognition")) ///	
				keep(T1 T2) ///
				collabels(none)	 ///
				mgroups("Full sample" "Grade 2"	 "Grade 3"	"Grade 4"	"French"	"Math", pattern(1 0 1 1 0 1 0 1 0 1 0) ///
				prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
				mtitles("Mid" "End" "Mid" "Mid" "End" "Mid" "End" "Mid" "End" "Mid" "End") ///
				nodepvars nonotes  star(  * 0.1  **  0.05 *** 0.01) 								

				
**********************************************
*** Table 2. By teacher and student gender *** 	
**********************************************

	
	eststo clear
	*Main treatment effects with split sample: male vs female teacher
	foreach var of varlist teach_female  {

	qui reg irt T1 T2 i.strat  i.grade i.subject female female_mis  student_age student_age_mis base_mean_* base_g?_mis HT_age* HT_female*  ///
			if period  == 1  & grade > 2 & (`var' == 1 | `var' == 0) [pweight = 1/students_per_school], cluster(code_ecole)			
				qui test T1 = T2
				local p1 `r(p)' 	
				eststo full_`var', addscalars(p1 `p1')		

	qui reg irt T1 T2 i.strat  i.grade i.subject female female_mis  student_age student_age_mis base_mean_* base_g?_mis HT_age* HT_female*  ///
			if period  == 1  & grade > 2 & `var' == 1  [pweight = 1/students_per_school], cluster(code_ecole)			
				qui test T1 = T2
				local p1 `r(p)' 	
				eststo `var'_yes, addscalars(p1 `p1')		
			
	qui reg irt T1 T2 i.strat  i.grade i.subject female female_mis  student_age student_age_mis base_mean_* base_g?_mis HT_age* HT_female*  ///
			if period  == 1  & grade > 2 & `var' == 0  [pweight = 1/students_per_school], cluster(code_ecole)						
				qui test T1 = T2
				local p1 `r(p)' 	
				eststo `var'_no, addscalars(p1 `p1')		
	}

	*Same regressions as above, but interacted with student gender
	*We want female to be the reference group, so create a boy dummy variable. 
	gen boy = female == 0
	replace boy = . if female_mis == 1
	*Interact with treatment. 
	forvalues i = 1/2 {
		gen boy_x_T`i' = boy * T`i'
	}
	*label variables. Also label the interactions as the coefficients, so that the F-tests can also be defined in terms of these coefficients. 
	lab var boy "Boy"
	lab var boy_x_T1 "In-Kind x Boy ($\hat{\alpha_3}$)"
	lab var boy_x_T2 "Recognition x Boy ($\hat{\alpha_4}$)"	
	lab var T1 "In-Kind ($\hat{\alpha_1}$)"
	lab var T2 "Recognition ($\hat{\alpha_2}$)"
	
	*Same regressions as above, but now interacted with student gender. 
	qui 	reg irt T1 T2 boy boy_*  female_mis*  i.strat  i.grade i.subject student_age student_age_mis base_mean_* HT_age* HT_female*  ///
			if period  == 1 & grade > 2  [pweight = 1/students_per_school], cluster(code_ecole)		
			qui test T1 = T2
			local p1 `r(p)' 	
			qui test T1 + boy_x_T1  = 0 
			local boy_t1 `r(p)' 	
			qui test T2 + boy_x_T2  = 0
			local boy_t2 `r(p)' 	
			qui test (T1 + boy_x_T1 ) = (T2 + boy_x_T2)
			local dif_boy `r(p)' 	
			eststo full_interact_whole_sample, addscalars(boy_t1 `boy_t1' boy_t2 `boy_t2' p1 `p1' dif_boy `dif_boy' )		

	qui reg irt T1 T2 boy boy_*  female_mis* i.strat  i.grade i.subject student_age student_age_mis base_mean_* HT_age* HT_female*  ///
			if period  == 1 & grade > 2 & _merge == 3 & (teach_female == 1 | teach_female == 0)  [pweight = 1/students_per_school], cluster(code_ecole)		
			qui test T1 = T2
			local p1 `r(p)' 	
			qui test T1 + boy_x_T1 = 0
			local boy_t1 `r(p)' 	
			qui test T2 + boy_x_T2  = 0
			local boy_t2 `r(p)' 	
			qui test (T1 + boy_x_T1 ) = (T2 + boy_x_T2)
			local dif_boy `r(p)' 	
			eststo full_interact, addscalars(boy_t1 `boy_t1' boy_t2 `boy_t2' p1 `p1' dif_boy `dif_boy' )		
			

	qui reg irt T1 T2 boy boy_*  female_mis* i.strat  i.grade i.subject    student_age student_age_mis base_mean_* HT_age* HT_female*  ///
			if period  == 1 & grade > 2 & _merge == 3 & teach_female == 1 [pweight = 1/students_per_school], cluster(code_ecole)		
			qui test T1 = T2
			local p1 `r(p)' 	
			qui test T1 + boy_x_T1  = 0
			local boy_t1 `r(p)' 	
			qui test T2 + boy_x_T2  = 0
			local boy_t2 `r(p)' 	
			qui test (T1 + boy_x_T1 ) = (T2 + boy_x_T2)
			local dif_boy `r(p)' 	
			eststo female_interact, addscalars(boy_t1 `boy_t1' boy_t2 `boy_t2' p1 `p1' dif_boy `dif_boy' )		

	qui 	reg irt T1 T2 boy boy_*  female_mis* i.strat  i.grade i.subject    student_age student_age_mis base_mean_* HT_age* HT_female*  ///
			if period  == 1 & grade > 2 & _merge == 3 & teach_female == 0 [pweight = 1/students_per_school], cluster(code_ecole)					
			test T1 = T2
			local p1 `r(p)' 	
			test T1 + boy_x_T1  = 0
			local boy_t1 `r(p)' 	
			test T2 + boy_x_T2  = 0
			local boy_t2 `r(p)' 	
			test (T1 + boy_x_T1 ) = (T2 + boy_x_T2)
			local dif_boy `r(p)' 	
			eststo male_interact, addscalars(boy_t1 `boy_t1' boy_t2 `boy_t2' p1 `p1' dif_boy `dif_boy' )		
		
	*Export tables. 	
	esttab 	full_teach_female teach_female_yes teach_female_no ///
			full_interact  female_interact male_interact  ///
			using "$tables/table2.tex", replace ///
			tex label se(3) b(3) alignment(cl) r2 substitute(\_ _ $ \$ ) ///
			stats(N r2 p1 dif_boy  boy_t1 boy_t2 ,  fmt(0 3 3 3 3 3 3) ///
			labels("Observations" "R-squared"  "$\hat{\alpha_1}=\hat{\alpha_2}$" "$\hat{\alpha_1}+\hat{\alpha_3}=\hat{\alpha_2}+\hat{\alpha_4}$" ///
						"$\hat{\alpha_1}+\hat{\alpha_3}=0$" "$\hat{\alpha_2}+\hat{\alpha_4}=0$")) ///	
			keep(T1 T2 boy_x_T1 boy_x_T2 boy) ///
			collabels(none)	 ///
			mtitles("All teachers" "Female" "Male" "All teachers" "Female" "Male" ) ///
			nodepvars nonotes  star(  * 0.1  **  0.05 *** 0.01) 								
	/*Note: I manually add the following code onto the table tax: 
	\multicolumn{7}{l}{\textit{F-tests}} \\
	\hline

	*/
	


************************************************
**** Table 4. Interaction with Ebola		****
************************************************
*Merge in the Ebola data
merge m:1 code_ecole using "$final\Ebola by prefecture with school code.dta", gen(merge_ebola)
lab var no_ebola "No Ebola"

**Assume that if unmatched (i.e. no data on prefectures), there were no cases by end of May. A reasonable assumption. 
gen mis_ebola_data = ebola == .
replace ebola = 0 if ebola == .
replace no_ebola = 1 if no_ebola == .

*Interact with treatment
forvalues  i = 1/2 {
	foreach var of varlist ebola  {
		gen `var'_x_T`i' = `var' * T`i'
	}
	}
	
lab var ebola "Ebola"
lab var ebola_x_T1 "Ebola x In-Kind"
lab var ebola_x_T2 "Ebola x Recognition"


**Interaction between treatment and Ebola
eststo clear	
qui reg irt T1 T2 ebola ebola_x_T? i.strat  i.grade i.subject female female_mis student_age student_age_mis base_mean_* HT_age* HT_sex*  ///
		 if period  == 1  & grade > 2 & merge_ebola == 3 [pweight = 1/students_per_school], cluster(code_ecole)	nocons	
		qui test T1 = T2
		local p1 `r(p)' 	
		eststo midline_interact_allgrades, addscalars(p1 `p1')

qui reg irt T1 T2 ebola ebola_x_T? i.strat  i.grade i.subject female female_mis student_age student_age_mis  base_mean_* HT_age* HT_sex*  ///
		 if period  == 2  & grade > 2 & merge_ebola == 3 [pweight = 1/students_per_school], cluster(code_ecole)	nocons	
		qui test T1 = T2
		local p1 `r(p)' 	
		eststo endline_interact, addscalars(p1 `p1')
		
		
qui reg irt T1 T2 ebola ebola_x_T? i.strat  i.grade i.subject female female_mis student_age student_age_mis  base_mean_* HT_age* HT_sex*  ///
		 if period  == 2  & grade == 3 & merge_ebola == 3 [pweight = 1/students_per_school], cluster(code_ecole)	nocons	
		qui test T1 = T2
		local p1 `r(p)' 	
		eststo endline_interact_gr3, addscalars(p1 `p1')

qui reg irt T1 T2 ebola ebola_x_T? i.strat  i.grade i.subject female female_mis student_age student_age_mis  base_mean_* HT_age* HT_sex*  ///
		 if period  == 2  & grade == 4 & merge_ebola == 3 [pweight = 1/students_per_school], cluster(code_ecole)	nocons	
		qui test T1 = T2
		local p1 `r(p)' 	
		eststo endline_interact_gr4, addscalars(p1 `p1')
		
	esttab midline_interact_allgrades endline_interact endline_interact_gr3 endline_interact_gr4 ///
		using "$tables/learning_by_ebola.tex", replace ///
		tex label se(3) b(3) alignment(cl) r2 substitute(\_ _ $ \$ ) ///
		stats(N r2,  fmt(0 3) labels("Observations" "R-squared")) ///	
		keep(T1 T2 ebola ebola_x_T1 ebola_x_T2) ///
		collabels(none)	 ///
		mgroups("Midline" "Endline" , pattern(1 1 0 0) ///
		prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
		mtitles("Full" "Full" "Grade 3" "Grade 4" ) ///
		nodepvars nonotes  star(  * 0.1  **  0.05 *** 0.01) 								



************************************************
**** Table 3. Inspection-level outcomes at midline		****
************************************************
		
*Open the midline inspection data. 
use "$final\inspection_clean_2013.dta", clear

eststo clear

foreach var of varlist class_index 	teach_index z_* {
	qui reg `var' T1 T2 i.strat, robust
	test T1=T2
	local p1 `r(p)'
	qui sum `var' if T0==1
	local m `r(mean)'
	eststo `var', addscalars(p1 `p1' m `m')
}

**Create a table for midline inspection level data:

esttab class_index z_spacial z_edu_doc ///
	teach_index z_total_writtenprep z_total_teachingmaterial z_total_class_practice z_total_reflection  ///
	using "$tables/inspection_midline_outcomes.tex", replace ///
	tex label se(3) b(3) alignment(cl) r2 substitute(\_ _ $ \$ ) ///
	keep (T1 T2) ///
	collabels(none) ///
	stats(m N r2 p1, fmt(3 0 3 3) labels("Control mean" "Observations" "R-squared" "Test:In-Kind=Recognition")) ///
	mgroups("Classroom quality" "Teaching quality" , pattern(1 0 0 1 0 0 0 0) ///
	prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
	mtitles("Overall" "\shortstack{Spatial\\org.}" "\shortstack{Educ.\\documents}" "Overall" "\shortstack{Written\\prep}" "\shortstack{Teaching\\material}" "\shortstack{Class\\practice}" "Reflection") ///
	nodepvars nonotes nocons star( * 0.1 ** 0.05 *** 0.01)
	

/* For those interested, the mean indeces were consutructed using the following code: 

*--------------------------------------------------------------------------------
* Constructing variables for regressions
*--------------------------------------------------------------------------------               
gen std_spacial = spacial/6
gen std_edu_doc = edu_doc/8
gen std_spacial_edu_doc = spacial_edu_doc/14
gen std_total_writtenprep = total_writtenprep/22
gen std_total_teachingmaterial = total_teachingmaterial/16
gen std_total_class_practice = total_class_practice/42
gen std_total_reflection = total_reflection/12
gen std_total_lesson = total_lesson/92
gen std_total_total = total_total/106

**Rather take the standardized means of the different domains. 
foreach var of varlist spacial edu_doc  {
	qui sum `var' if treat == 0
	gen z_`var' = (`var'-`r(mean)')/`r(sd)'
		}
egen class_index = rmean(z_spacial z_edu_doc)		
		
foreach var of varlist total_writtenprep total_teachingmaterial total_class_practice total_reflection {
	qui sum `var' if treat == 0
	gen z_`var' = (`var'-`r(mean)')/`r(sd)'
		}
egen teach_index = rmean(z_total_writtenprep z_total_teachingmaterial z_total_class_practice z_total_reflection)		


*/
	
	
********************
**** Done!		****
********************
	
	
		